# Paper Source: J. C. Dunn,
# A Fuzzy Relative of the ISODATA process and its Use in Detecting
# Compact Well- Separated Clusters, Journal of Cybernetics 3 (3) (1973) 32–57.

# Min number of clusters = 2
# The time complexity is O(nk), where n is the number of data points
# and k is the number of clusters.
# To estimate the number of clusters: Use max of DUNN


import numpy as np
from scipy.spatial.distance import cdist
from scipy.spatial.distance import squareform
def show(dataSet, k, centroids, clusterAssment):
    from matplotlib import pyplot as plt
    numSamples, dim = dataSet.shape
    mark = ['or', 'ob', 'og', 'ok', '^r', '+r', 'sr', 'dr', '<r', 'pr']

    for i in range(numSamples):
        markIndex = int(clusterAssment[i])
        plt.plot(dataSet[i, 0], dataSet[i, 1], mark[markIndex])
    mark = ['Dr', 'Db', 'Dg', 'Dk', '^b', '+b', 'sb', 'db', '<b', 'pb']
    for i in range(k):
        plt.plot(centroids[i, 0], centroids[i, 1], mark[i], markersize=12)
    plt.show()
def distEclud(vecA,vecB):
    return sqrt(sum(power(vecA-vecB,2)))
def extract_cluster(densitySortArr,closestNodeIdArr, classNum,gamma):
    n=densitySortArr.shape[0]
    # Initializes the class of each point
    labels=np.full((n,),-1)
    corePoints =  np.argsort(-gamma)[: classNum]
    # Assign a category to the selected cluster center
    labels[corePoints]=range(len(corePoints))
    # Converts the ndarrar array to a list collection
    densitySortList=densitySortArr.tolist()
    # Inverts the set elements, that is, the order index from highest density to lowest density
    densitySortList.reverse()
    # Iterate to assign a label to each element
    for nodeId in densitySortList:
        if(labels[nodeId]==-1):
            # If the nodeId node has no category
            # Firstly, obtain the index of closestNodeIdArr[nodeId], which represents the nearest point with a higher density than the current point.
            # Assign the class of the nearest point that has a higher density than itself to the nodeId.
            labels[nodeId]=labels[closestNodeIdArr[nodeId]]
    return corePoints,labels

def CFSFDP(data,dc):
    n,m=data.shape
    # Create a distance matrix for any two points.
    disMat = squareform(pdist(data,metric='euclidean'))

    # Calculate the density of each point (i.e., how many points are within a circle of radius dc centered at that point).
    densityArr = np.where(disMat < dc, 1, 0).sum(axis=1)

    # Sort the data points based on their density in ascending order (from smallest to largest).
    densitySortArr=np.argsort(densityArr)
    # Initialize: the nearest distance to a point with a higher density than oneself.
    closestDisOverSelfDensity = np.zeros((n,))
    # Initialize: the node ID corresponding to the nearest distance to a point with a higher density than oneself.
    closestNodeIdArr = np.zeros((n,), dtype=np.int32)
    # Iterate starting from the point with the smallest density.
    for index,nodeId in enumerate(densitySortArr):
        # Set of points with density greater than the current point.
        nodeIdArr = densitySortArr[index+1:]
        # If it is not the point with the maximum density.
        if nodeIdArr.size != 0:
            # Calculate the set of distances from nodeId to points with higher density than itself.
            largerDistArr = disMat[nodeId][nodeIdArr]
            # Find the node with the smallest distance that has a higher density than oneself.
            closestDisOverSelfDensity[nodeId] = np.min(largerDistArr)
            # Find the index of the minimum value, where the index corresponds to the one in largerdist (ensuring it is for a node with a higher density than nodeId).
            # If there are multiple nearest nodes, take the first one.
            # Note that the index here refers to the one within largerDistArr.
            min_distance_index = np.argwhere(largerDistArr == closestDisOverSelfDensity[nodeId])[0][0]
            # Obtain the index value within the entire dataset.
            closestNodeIdArr[nodeId] = nodeIdArr[min_distance_index]
        else:
            # If it is the point with the maximum density, set its distance to the maximum value and its corresponding ID to itself.
            closestDisOverSelfDensity[nodeId] = np.max(closestDisOverSelfDensity)
            closestNodeIdArr[nodeId] = nodeId
    #  Due to the potential difference in magnitude between the density and the shortest distance attributes, normalize both of them separately to make the results smoother.
    epsilon = 1e-9  # A small quantity used to avoid division by zero.
    normal_den = (densityArr - np.min(densityArr)) / (np.max(densityArr) - np.min(densityArr) + epsilon)


    normal_dis = (closestDisOverSelfDensity - np.min(closestDisOverSelfDensity)) / (
                np.max(closestDisOverSelfDensity) - np.min(closestDisOverSelfDensity))
    aver = np.sum(normal_den) / m
    gamma = normal_den * normal_dis

    return densityArr,densitySortArr,closestDisOverSelfDensity,closestNodeIdArr,gamma
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform

def dunn(pairwise_distances, labels):
    # Initializing the minimum inter-cluster distance and the maximum intra-cluster distance.
    inter_center_dists = np.inf
    intra_center_dists = 0

    # Obtaining all unique cluster labels.
    unique_labels = np.unique(labels)

    # Iterating through all clusters.
    for iter1 in unique_labels:
        # Obtaining the distances between all samples in the current cluster and the samples in other clusters.
        dist_to_other_clusters = pairwise_distances[labels == iter1, :][:, labels != iter1]

        # Check if 'dist_to_other_clusters' is empty.
        if dist_to_other_clusters.size > 0:
            # Calculating the minimum distance between the current cluster and other clusters.
            min_dist_to_other_clusters = dist_to_other_clusters.min()
            inter_center_dists = min(inter_center_dists, min_dist_to_other_clusters)

            # Obtaining the distances between all samples within the current cluster.
        dist_within_cluster = pairwise_distances[labels == iter1, :][:, labels == iter1]

        # Only considering non-zero distances (i.e., distances between different samples).
        dist_within_cluster = dist_within_cluster[dist_within_cluster > 0]

        # Check if 'dist_within_cluster' is empty.
        if dist_within_cluster.size > 0:
            # Calculating the maximum distance within the current cluster.
            max_dist_within_cluster = dist_within_cluster.max()
            intra_center_dists = max(intra_center_dists, max_dist_within_cluster)

            # Prevent division by zero.
    return inter_center_dists / np.fmax(intra_center_dists, 1.0e-16)
import xlwt
def main11(data,dc):
    minK = 2
    #from sklearn.datasets import load_iris
    #data = load_iris().data
    maxK = int(np.ceil(data.shape[0] ** 0.5))
    book = xlwt.Workbook()
    # Create a sheet
    sheet1 = book.add_sheet(u'sheet1', cell_overwrite_ok=True)
    from sklearn.cluster import KMeans
    index = np.zeros((maxK - minK))
    pairwise_distances = cdist(data, data)
    densityArr, densitySortArr, closestDisOverSelfDensity, closestNodeIdArr, gamma = CFSFDP(data, dc)
    for k in range(minK, maxK):

        corePoints, labels = extract_cluster(densitySortArr, closestNodeIdArr, k, gamma)
        index[k - minK] = dunn(
            pairwise_distances, labels
        )
        #print(index[k - minK] )
        sheet1.write(k, 1, index[k - minK])

    est_k = index.argmax() + minK
    print('Selected k =', est_k)



